library(tidyverse)
library(readxl)
library(janitor)
library(scales)
library(kableExtra)

0 — Configuration & Crosswalk

v2.1 Change: 9 programs across all 6 SHRS departments, each with a unique SOC code. SS (Exercise Physiologists) restored; OT-CScD duplicate removed.

# —— EDIT THIS ONE LINE to match your machine ——
MARKET_ROOT <- file.path(path.expand("~"), "Documents", "SHRS_Analysis",
                          "market_analysis")

# Skills data paths
BLS_SKILLS_PATH  <- file.path(MARKET_ROOT, "skills", "bls", "skills.xlsx")
ONET_SKILLS_PATH <- file.path(MARKET_ROOT, "skills", "onet", "Skills-2.xlsx")
ONET_EDU_PATH    <- file.path(MARKET_ROOT, "skills", "onet",
                               "Education__Training__and_Experience.xlsx")

# Full crosswalk (9 programs, 9 unique SOC codes)
soc_crosswalk <- tribble(
  ~shrs_program, ~shrs_dept, ~soc_code, ~occupation_title,               ~pitt_degree,
  "SLP",         "CSD",      "29-1127", "Speech-Language Pathologists",   "Master's",
  "AuD",         "CSD",      "29-1181", "Audiologists",                   "Doctoral",
  "HIM",         "HIM",      "29-9021", "Health Information Technologists","Master's",
  "OTD",         "OT",       "29-1122", "Occupational Therapists",        "Doctoral",
  "DPT",         "PT",       "29-1123", "Physical Therapists",            "Doctoral",
  "PAS",         "PAS",      "29-1071", "Physician Assistants",           "Master's",
  "AT",          "SMN",      "29-9091", "Athletic Trainers",              "Master's",
  "DN",          "SMN",      "29-1031", "Dietitians and Nutritionists",   "Master's",
  "SS",          "SMN",      "29-1128", "Exercise Physiologists",         "Master's"
)

# All SOC codes are unique — no dedup needed
target_socs <- soc_crosswalk$soc_code

# Color palette for all 9 programs
program_colors <- c(
  "SLP" = "#2c7bb6", "AuD" = "#abd9e9",
  "HIM" = "#756bb1",
  "OTD" = "#e6550d",
  "DPT" = "#31a354",
  "PAS" = "#de2d26",
  "AT"  = "#fdae61", "DN"  = "#d7191c",
  "SS"  = "#1a9641"
)

1 — BLS Skill Profiles (Broad View)

BLS Table 6.5 provides percentile ranks (0–100) across 17 skill dimensions for each occupation. Higher = that skill is more important relative to all other occupations in the economy.

bls_skills_wide <- read_excel(BLS_SKILLS_PATH, sheet = "Table 6.5", skip = 1,
                               col_types = "text") |>
  clean_names()

# Rename first two columns by position
names(bls_skills_wide)[1] <- "occupation_title"
names(bls_skills_wide)[2] <- "soc_code"

# Filter to our targets and pivot long
bls_skills <- bls_skills_wide |>
  filter(soc_code %in% target_socs) |>
  pivot_longer(cols = -c(occupation_title, soc_code),
               names_to = "skill", values_to = "percentile") |>
  mutate(
    percentile = as.numeric(percentile),
    skill = str_replace_all(skill, "_", " ") |> str_to_title()
  ) |>
  left_join(soc_crosswalk |> select(shrs_program, shrs_dept, soc_code, occupation_title), by = "soc_code")

1.1 Skill Heatmap — All Programs

bls_skills |>
  ggplot(aes(x = shrs_program, y = reorder(skill, percentile), fill = percentile)) +
  geom_tile(color = "white", linewidth = 0.5) +
  geom_text(aes(label = percentile), size = 2.8, color = "black") +
  scale_fill_gradient2(low = "#d73027", mid = "#fee08b", high = "#1a9850",
                        midpoint = 50, limits = c(0, 100),
                        name = "Percentile\nRank") +
  labs(
    title = "BLS Skill Percentile Ranks by SHRS Program",
    subtitle = "0 = lowest across all occupations; 100 = highest | 9 programs, 9 unique SOC codes",
    x = NULL, y = NULL
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(face = "bold", angle = 30, hjust = 1),
        panel.grid = element_blank())

1.2 Skill Profiles by Program

bls_skills |>
  ggplot(aes(x = reorder(skill, percentile), y = percentile,
             fill = shrs_program)) +
  geom_col(width = 0.7) +
  geom_hline(yintercept = 50, linetype = "dashed", color = "gray50") +
  coord_flip() +
  facet_wrap(~ shrs_program, ncol = 2) +
  scale_fill_manual(values = program_colors) +
  scale_y_continuous(limits = c(0, 100)) +
  labs(
    title = "Skill Profiles by SHRS Program (BLS Percentile Ranks)",
    subtitle = "Dashed line = 50th percentile (median across all occupations)",
    x = NULL, y = "Percentile Rank (0–100)"
  ) +
  theme_minimal(base_size = 10) +
  theme(legend.position = "none")

1.3 Top 3 Skills per Program

bls_skills |>
  group_by(shrs_program) |>
  slice_max(percentile, n = 3) |>
  arrange(shrs_program, desc(percentile)) |>
  select(Program = shrs_program, Skill = skill, Percentile = percentile) |>
  kable() |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Program Skill Percentile
AT Adaptability 94
AT Interpersonal 94
AT Problem Solving And Decision Making 92
AuD Customer Service 98
AuD Interpersonal 98
AuD Computers And Information Technology 96
DN Interpersonal 92
DN Science 92
DN Project Management 87
DN Writing And Reading 87
DPT Interpersonal 98
DPT Problem Solving And Decision Making 95
DPT Customer Service 92
HIM Computers And Information Technology 99
HIM Mathematics 89
HIM Critical And Analytical Thinking 86
OTD Interpersonal 98
OTD Adaptability 96
OTD Creativity And Innovation 90
OTD Problem Solving And Decision Making 90
PAS Adaptability 100
PAS Interpersonal 99
PAS Problem Solving And Decision Making 98
SLP Adaptability 99
SLP Interpersonal 97
SLP Critical And Analytical Thinking 93
SLP Problem Solving And Decision Making 93
SS Interpersonal 94
SS Writing And Reading 87
SS Science 86

1.4 Distinguishing Skills

Which skills most differentiate each program from the average of the other programs?

n_other <- length(unique(target_socs)) - 1

bls_diff <- bls_skills |>
  group_by(skill) |>
  mutate(
    others_mean = (sum(percentile) - percentile) / n_other
  ) |>
  ungroup() |>
  mutate(diff_from_others = percentile - others_mean)

bls_diff |>
  group_by(shrs_program) |>
  slice_max(abs(diff_from_others), n = 5) |>
  arrange(shrs_program, desc(diff_from_others)) |>
  select(Program = shrs_program, Skill = skill,
         Percentile = percentile, `Others Avg` = others_mean,
         `Difference` = diff_from_others) |>
  mutate(across(where(is.numeric), ~ round(.x, 1))) |>
  kable() |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Program Skill Percentile Others Avg Difference
AT Detail Oriented 83 46.8 36.2
AT Physical Strength And Stamina 70 40.8 29.2
AT Critical And Analytical Thinking 63 85.9 -22.9
AT Writing And Reading 62 86.1 -24.1
AT Mathematics 16 61.5 -45.5
AuD Mechanical 64 27.8 36.2
AuD Customer Service 98 67.4 30.6
AuD Computers And Information Technology 96 69.1 26.9
AuD Fine Motor 60 33.4 26.6
AuD Project Management 84 60.1 23.9
DN Mathematics 85 52.9 32.1
DN Adaptability 55 83.0 -28.0
DN Physical Strength And Stamina 16 47.5 -31.5
DN Detail Oriented 21 54.5 -33.5
DN Fine Motor 4 40.4 -36.4
DPT Physical Strength And Stamina 80 39.5 40.5
DPT Fine Motor 58 33.6 24.4
DPT Customer Service 92 68.1 23.9
DPT Leadership 84 67.5 16.5
DPT Computers And Information Technology 51 74.8 -23.8
HIM Adaptability 43 84.5 -41.5
HIM Physical Strength And Stamina 4 49.0 -45.0
HIM Customer Service 29 76.0 -47.0
HIM Problem Solving And Decision Making 39 86.5 -47.5
HIM Interpersonal 36 96.2 -60.2
OTD Creativity And Innovation 90 60.1 29.9
OTD Adaptability 96 77.9 18.1
OTD Detail Oriented 32 53.1 -21.1
OTD Computers And Information Technology 53 74.5 -21.5
OTD Mathematics 26 60.2 -34.2
PAS Detail Oriented 97 45.0 52.0
PAS Mathematics 79 53.6 25.4
PAS Computers And Information Technology 93 69.5 23.5
PAS Adaptability 100 77.4 22.6
PAS Creativity And Innovation 39 66.5 -27.5
SLP Adaptability 99 77.5 21.5
SLP Mathematics 39 58.6 -19.6
SLP Physical Strength And Stamina 25 46.4 -21.4
SLP Leadership 50 71.8 -21.8
SLP Project Management 19 68.2 -49.2
SS Physical Strength And Stamina 65 41.4 23.6
SS Problem Solving And Decision Making 63 83.5 -20.5
SS Detail Oriented 27 53.8 -26.8
SS Adaptability 56 82.9 -26.9
SS Project Management 32 66.6 -34.6

2 — O*NET Detailed Skill Breakdown

O*NET provides 35 specific skills scored on importance (1–5 scale) and level (1–7 scale). We focus on importance scores to understand what matters most for each occupation.

onet_raw <- read_excel(ONET_SKILLS_PATH, sheet = "Skills") |>
  clean_names()

# Filter: our target SOCs, importance scores only, not suppressed
onet_skills <- onet_raw |>
  mutate(soc_code = str_sub(o_net_soc_code, 1, 7)) |>
  filter(soc_code %in% target_socs,
         scale_id == "IM",
         recommend_suppress != "Y" | is.na(recommend_suppress)) |>
  select(soc_code, title, skill = element_name,
         importance = data_value) |>
  left_join(soc_crosswalk |> select(shrs_program, shrs_dept, soc_code, occupation_title), by = "soc_code")

2.1 Top 10 Skills by Program (Importance)

onet_skills |>
  group_by(shrs_program) |>
  slice_max(importance, n = 10) |>
  ggplot(aes(x = reorder(skill, importance), y = importance,
             fill = shrs_program)) +
  geom_col(width = 0.7) +
  coord_flip() +
  facet_wrap(~ shrs_program, ncol = 2, scales = "free_y") +
  scale_fill_manual(values = program_colors) +
  scale_y_continuous(limits = c(0, 5)) +
  labs(
    title = "Top 10 Most Important Skills by SHRS Program",
    subtitle = "O*NET Importance Score (1–5 scale)",
    x = NULL, y = "Importance"
  ) +
  theme_minimal(base_size = 10) +
  theme(legend.position = "none")

2.2 Full O*NET Skill Heatmap

onet_skills |>
  ggplot(aes(x = shrs_program, y = reorder(skill, importance), fill = importance)) +
  geom_tile(color = "white", linewidth = 0.3) +
  geom_text(aes(label = round(importance, 1)), size = 2.5) +
  scale_fill_gradient2(low = "#d73027", mid = "#fee08b", high = "#1a9850",
                        midpoint = 2.5, limits = c(1, 5),
                        name = "Importance\n(1–5)") +
  labs(
    title = "O*NET Skill Importance Across SHRS Programs",
    subtitle = "35 skills rated by importance (1 = not important, 5 = extremely important)",
    x = NULL, y = NULL
  ) +
  theme_minimal(base_size = 11) +
  theme(axis.text.x = element_text(face = "bold", angle = 30, hjust = 1),
        panel.grid = element_blank())

2.3 Skill Clusters

Group the 35 O*NET skills into functional clusters to see where each program’s strengths concentrate.

# Define skill clusters
cluster_map <- tribble(
  ~skill,                              ~cluster,
  "Active Listening",                  "Communication",
  "Speaking",                          "Communication",
  "Writing",                           "Communication",
  "Reading Comprehension",             "Communication",
  "Social Perceptiveness",             "Interpersonal",
  "Coordination",                      "Interpersonal",
  "Persuasion",                        "Interpersonal",
  "Negotiation",                       "Interpersonal",
  "Instructing",                       "Interpersonal",
  "Service Orientation",               "Interpersonal",
  "Critical Thinking",                 "Analytical",
  "Complex Problem Solving",           "Analytical",
  "Judgment and Decision Making",      "Analytical",
  "Systems Analysis",                  "Analytical",
  "Systems Evaluation",                "Analytical",
  "Operations Analysis",               "Analytical",
  "Active Learning",                   "Learning & Adaptability",
  "Learning Strategies",               "Learning & Adaptability",
  "Monitoring",                        "Learning & Adaptability",
  "Time Management",                   "Management",
  "Management of Personnel Resources", "Management",
  "Management of Material Resources",  "Management",
  "Management of Financial Resources", "Management",
  "Quality Control Analysis",          "Technical",
  "Technology Design",                 "Technical",
  "Equipment Selection",               "Technical",
  "Installation",                      "Technical",
  "Programming",                       "Technical",
  "Operations Monitoring",             "Technical",
  "Operation and Control",             "Technical",
  "Equipment Maintenance",             "Technical",
  "Troubleshooting",                   "Technical",
  "Repairing",                         "Technical",
  "Mathematics",                       "Science & Math",
  "Science",                           "Science & Math"
)

onet_clustered <- onet_skills |>
  left_join(cluster_map, by = "skill") |>
  filter(!is.na(cluster)) |>
  group_by(shrs_program, cluster) |>
  summarise(avg_importance = mean(importance, na.rm = TRUE), .groups = "drop")

onet_clustered |>
  ggplot(aes(x = cluster, y = avg_importance, fill = shrs_program)) +
  geom_col(position = "dodge", width = 0.7) +
  scale_fill_manual(values = program_colors) +
  scale_y_continuous(limits = c(0, 5)) +
  labs(
    title = "Average Skill Importance by Cluster",
    subtitle = "O*NET skills grouped into functional categories",
    x = NULL, y = "Average Importance (1–5)", fill = "Program"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

onet_clustered |>
  ggplot(aes(x = shrs_program, y = cluster, fill = avg_importance)) +
  geom_tile(color = "white", linewidth = 0.5) +
  geom_text(aes(label = round(avg_importance, 2)), size = 3.2) +
  scale_fill_gradient2(low = "#d73027", mid = "#fee08b", high = "#1a9850",
                        midpoint = 2.5, limits = c(1, 5),
                        name = "Avg\nImportance") +
  labs(
    title = "Skill Cluster Heatmap by SHRS Program",
    x = NULL, y = NULL
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(face = "bold", angle = 30, hjust = 1),
        panel.grid = element_blank())


3 — Education & Training Pathways

What education, experience, and training does the labor market actually require for each SHRS occupation? This directly informs whether program structures are aligned with market expectations.

edu_raw <- read_excel(ONET_EDU_PATH,
                       sheet = "Education, Training, and Experi") |>
  clean_names()

edu_raw <- edu_raw |>
  mutate(soc_code = str_sub(o_net_soc_code, 1, 7)) |>
  filter(soc_code %in% target_socs) |>
  left_join(soc_crosswalk |> select(shrs_program, shrs_dept, soc_code, occupation_title), by = "soc_code")

3.1 Education Level Distribution

What percentage of workers in each occupation hold each education level?

edu_labels <- tribble(
  ~category, ~edu_level,
  1,  "Less than HS",
  2,  "HS Diploma/GED",
  3,  "Post-secondary Certificate",
  4,  "Some College",
  5,  "Associate's Degree",
  6,  "Bachelor's Degree",
  7,  "Post-baccalaureate Certificate",
  8,  "Master's Degree",
  9,  "Post-master's Certificate",
  10, "First Professional Degree",
  11, "Doctoral Degree",
  12, "Post-doctoral Training"
)

edu_dist <- edu_raw |>
  filter(element_id == "2.D.1", scale_id == "RL") |>
  mutate(category = as.integer(category),
         pct = as.numeric(data_value)) |>
  filter(pct > 0) |>
  left_join(edu_labels, by = "category") |>
  mutate(edu_level = factor(edu_level, levels = edu_labels$edu_level))

edu_dist |>
  ggplot(aes(x = shrs_program, y = pct, fill = edu_level)) +
  geom_col(width = 0.7) +
  scale_fill_viridis_d(option = "D", direction = -1, name = "Education Level") +
  labs(
    title = "Education Level Distribution by SHRS Occupation",
    subtitle = "Percent of workers at each education level (O*NET incumbent survey)",
    x = NULL, y = "Percent of Workers (%)"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "right",
        axis.text.x = element_text(angle = 30, hjust = 1))

edu_dist |>
  select(Program = shrs_program, `Education Level` = edu_level,
         `% of Workers` = pct) |>
  mutate(`% of Workers` = round(`% of Workers`, 1)) |>
  kable() |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Program Education Level % of Workers
DN Associate’s Degree 3.3
DN Bachelor’s Degree 10.0
DN Post-baccalaureate Certificate 53.3
DN Master’s Degree 33.3
PAS Bachelor’s Degree 5.4
PAS Master’s Degree 81.1
PAS First Professional Degree 13.5
PAS HS Diploma/GED 21.8
PAS Post-secondary Certificate 16.8
PAS Associate’s Degree 4.1
PAS Master’s Degree 41.7
PAS Post-master’s Certificate 1.1
PAS First Professional Degree 14.5
OTD Bachelor’s Degree 13.6
OTD Master’s Degree 86.4
OTD Bachelor’s Degree 17.4
OTD Post-baccalaureate Certificate 21.7
OTD Master’s Degree 56.5
OTD Post-master’s Certificate 4.3
DPT Associate’s Degree 6.3
DPT Bachelor’s Degree 8.3
DPT Master’s Degree 38.3
DPT Doctoral Degree 47.1
SLP Master’s Degree 88.5
SLP Post-master’s Certificate 11.5
SS Bachelor’s Degree 59.1
SS Master’s Degree 31.8
SS Doctoral Degree 9.1
AuD Doctoral Degree 95.4
AuD Post-doctoral Training 4.6
AT HS Diploma/GED 8.3
AT Bachelor’s Degree 37.4
AT Master’s Degree 54.3

3.3 On-the-Job Training Requirements

ojt_labels <- tribble(
  ~category, ~training,
  1, "None",
  2, "1 day to 1 month",
  3, "1–3 months",
  4, "3–6 months",
  5, "6 months–1 year",
  6, "1–2 years",
  7, "2–4 years",
  8, "4–10 years",
  9, "Over 10 years"
)

ojt_dist <- edu_raw |>
  filter(element_id == "3.A.3", scale_id == "OJ") |>
  mutate(category = as.integer(category),
         pct = as.numeric(data_value)) |>
  filter(pct > 0) |>
  left_join(ojt_labels, by = "category") |>
  mutate(training = factor(training, levels = ojt_labels$training))

if (nrow(ojt_dist) > 0) {
  ojt_dist |>
    ggplot(aes(x = shrs_program, y = pct, fill = training)) +
    geom_col(width = 0.7) +
    scale_fill_viridis_d(option = "B", direction = -1, name = "OJT Duration") +
    labs(
      title = "On-the-Job Training Requirements by SHRS Occupation",
      subtitle = "O*NET incumbent survey",
      x = NULL, y = "Percent of Workers (%)"
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "right",
          axis.text.x = element_text(angle = 30, hjust = 1))
}


4 — Skills & Education Synthesis

Bringing together the skill profiles and education pathways into a single program-level summary.

# Dominant education level per program
dominant_edu <- edu_dist |>
  group_by(shrs_program) |>
  slice_max(pct, n = 1) |>
  select(shrs_program, dominant_edu = edu_level, edu_pct = pct)

# Top skill cluster per program
top_cluster <- onet_clustered |>
  group_by(shrs_program) |>
  slice_max(avg_importance, n = 1) |>
  select(shrs_program, top_cluster = cluster, cluster_importance = avg_importance)

# Top BLS skill per program
top_bls_skill <- bls_skills |>
  group_by(shrs_program) |>
  slice_max(percentile, n = 1) |>
  select(shrs_program, top_bls_skill = skill, top_percentile = percentile)

# Average O*NET importance (overall skill intensity)
avg_intensity <- onet_skills |>
  group_by(shrs_program) |>
  summarise(avg_skill_importance = round(mean(importance, na.rm = TRUE), 2),
            .groups = "drop")

# Combine
synthesis <- soc_crosswalk |>
  select(shrs_program, shrs_dept) |>
  left_join(dominant_edu, by = "shrs_program") |>
  left_join(top_cluster, by = "shrs_program") |>
  left_join(top_bls_skill, by = "shrs_program") |>
  left_join(avg_intensity, by = "shrs_program")
synthesis |>
  select(
    Program            = shrs_program,
    Dept               = shrs_dept,
    `Primary Education`  = dominant_edu,
    `% at that Level`    = edu_pct,
    `Top Skill Cluster`  = top_cluster,
    `Top BLS Skill`      = top_bls_skill,
    `Skill Intensity`    = avg_skill_importance
  ) |>
  kable(digits = 1) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Program Dept Primary Education % at that Level Top Skill Cluster Top BLS Skill Skill Intensity
SLP CSD Master’s Degree 88.5 Communication Adaptability 2.8
AuD CSD Doctoral Degree 95.4 Communication Customer Service 3.0
AuD CSD Doctoral Degree 95.4 Communication Interpersonal 3.0
HIM HIM NA NA NA Computers And Information Technology NA
OTD OT Master’s Degree 86.4 Communication Interpersonal 2.8
DPT PT Doctoral Degree 47.1 Communication Interpersonal 2.8
PAS PAS Master’s Degree 81.1 Communication Adaptability 2.8
AT SMN Master’s Degree 54.3 Communication Adaptability 2.8
AT SMN Master’s Degree 54.3 Communication Interpersonal 2.8
DN SMN Post-baccalaureate Certificate 53.3 Communication Interpersonal 2.9
DN SMN Post-baccalaureate Certificate 53.3 Communication Science 2.9
SS SMN Bachelor’s Degree 59.1 Communication Interpersonal 2.8

4.1 Program Positioning Map

# Use skill intensity vs education level as a 2D positioning
edu_numeric <- tribble(
  ~dominant_edu,                      ~edu_years,
  "Less than HS",                     10,
  "HS Diploma/GED",                   12,
  "Post-secondary Certificate",       13,
  "Some College",                     14,
  "Associate's Degree",               14,
  "Bachelor's Degree",                16,
  "Post-baccalaureate Certificate",   17,
  "Master's Degree",                  18,
  "Post-master's Certificate",        19,
  "First Professional Degree",        20,
  "Doctoral Degree",                  21,
  "Post-doctoral Training",           22
)

positioning <- synthesis |>
  left_join(edu_numeric, by = "dominant_edu")

positioning |>
  ggplot(aes(x = edu_years, y = avg_skill_importance,
             color = shrs_program, label = shrs_program)) +
  geom_point(size = 5) +
  geom_text(vjust = -1.2, size = 4.5, fontface = "bold") +
  scale_color_manual(values = program_colors) +
  scale_x_continuous(
    breaks = c(14, 16, 18, 21),
    labels = c("Associate's", "Bachelor's", "Master's", "Doctoral")
  ) +
  labs(
    title = "Program Positioning: Education Level vs Skill Intensity",
    subtitle = "Where each SHRS program sits in terms of training investment and skill demands",
    x = "Dominant Education Level", y = "Average Skill Importance (O*NET 1–5)"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")


5 — Cross-Program Skill Comparison (NEW in v2)

With 9 programs now in scope, we can do richer cross-program comparisons to identify shared skill foundations and unique differentiators.

5.1 Programs Clustered by Skill Similarity

# Build a program × skill matrix for BLS skills
skill_matrix <- bls_skills |>
  select(shrs_program, skill, percentile) |>
  pivot_wider(names_from = skill, values_from = percentile)

# Compute distance and cluster
if (nrow(skill_matrix) >= 3) {
  skill_dist <- dist(skill_matrix |> select(-shrs_program))
  skill_clust <- hclust(skill_dist, method = "ward.D2")

  # Label with program names
  skill_clust$labels <- skill_matrix$shrs_program

  plot(skill_clust, main = "SHRS Programs Clustered by BLS Skill Similarity",
       sub = "Ward's method on 17-dimension BLS percentile vectors",
       xlab = "", ylab = "Distance")
}

5.2 Shared vs Unique Skill Foundations

# Skills where ALL programs score above 50th percentile
universal_skills <- bls_skills |>
  group_by(skill) |>
  summarise(
    min_pctl = min(percentile),
    max_pctl = max(percentile),
    avg_pctl = mean(percentile),
    .groups = "drop"
  ) |>
  filter(min_pctl >= 50) |>
  arrange(desc(avg_pctl))

if (nrow(universal_skills) > 0) {
  cat("Skills where ALL SHRS programs score above the 50th percentile:\n\n")
  universal_skills |>
    select(Skill = skill, `Min Percentile` = min_pctl,
           `Max Percentile` = max_pctl, `Average` = avg_pctl) |>
    kable(digits = 0) |>
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
} else {
  cat("No skills universally above the 50th percentile across all programs.\n")
}
## Skills where ALL SHRS programs score above the 50th percentile:
Skill Min Percentile Max Percentile Average
Science 65 96 83
Writing And Reading 62 94 83
Critical And Analytical Thinking 63 95 83
Speaking And Listening 63 89 78
Computers And Information Technology 51 99 72
Leadership 50 84 69
# Skills most unique to each program (biggest positive difference from others)
bls_diff |>
  group_by(shrs_program) |>
  slice_max(diff_from_others, n = 1) |>
  select(Program = shrs_program, `Signature Skill` = skill,
         Percentile = percentile, `Others Avg` = others_mean,
         `Advantage` = diff_from_others) |>
  mutate(across(where(is.numeric), ~ round(.x, 1))) |>
  kable() |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Program Signature Skill Percentile Others Avg Advantage
AT Detail Oriented 83 46.8 36.2
AuD Mechanical 64 27.8 36.2
DN Mathematics 85 52.9 32.1
DPT Physical Strength And Stamina 80 39.5 40.5
HIM Mathematics 89 52.4 36.6
OTD Creativity And Innovation 90 60.1 29.9
PAS Detail Oriented 97 45.0 52.0
SLP Adaptability 99 77.5 21.5
SS Physical Strength And Stamina 65 41.4 23.6

Appendix: Data Sources

  • BLS Skills Data: Table 6.5 from Employment Projections program — percentile ranks across 17 skill dimensions
  • O*NET Skills: 35 detailed skills with importance and level ratings from incumbent worker surveys
  • O*NET Education, Training & Experience: Education distributions, work experience requirements, and on-the-job training from incumbent surveys

v2.1 Notes:

  • 9 programs, each with a unique SOC code
  • Programs: SLP, AuD, HIM, OTD, DPT, PAS, AT, DN, SS
  • SS (Exercise Physiologists, 29-1128) restored
  • OT-CScD duplicate removed (shared SOC 29-1122 with OTD)
  • Cross-program clustering analysis in Section 5